Load generic libraries
source('configuration.r')
Load plot specific libraries
library(ggtree)
library(HiveR)
library(magrittr)
library(phytools)
library(forcats)
Merge data
operams.sp.list <- c('Elizabethkingia_anophelis', 'Acinetobacter_baumannii', 'Staphylococcus_aureus', 'Staphylococcus_epidermidis') ## only use opera-ms assemblies for these species
df.canu <- read.table('../tables/genome_info.dat', head=TRUE, stringsAsFactors = FALSE, comment.char = '!', sep='\t')
df.operams <- read.table('../tables/genome_info_opera_ms.dat', head=TRUE, stringsAsFactors = FALSE, comment.char = '!', sep='\t')
df <- bind_rows(filter(df.canu, !Species_name %in% operams.sp.list) %>% mutate(Assembler='Canu'),
filter(df.operams, Species_name %in% operams.sp.list) %>% mutate(Assembler='OPERA-MS'))
meta.illumina <- read.table('../metadata/illumina_metadata.txt', head=TRUE)[,-2]
meta.nanopore <- read.table('../metadata/nanopore.metadata.txt', head=TRUE, sep='\t', strip.white = TRUE)
meta.merged <- merge(meta.nanopore, meta.illumina,
by.x='Illumina_Library_ID',
by.y='Library')
meta.merged <- merge(df, meta.merged,
by='Nanopore_ID') %>%
select(Genome_ID, everything()) %>%
mutate(Sample_type=fct_drop(Sample_type))
colnames(meta.merged)[3] <- 'Species'
## fix names in metadata to be consistent with the manuscript
levels(meta.merged$Sample_type) <- str_replace_all(levels(meta.merged$Sample_type), c('handle-interior'='Handle'))
levels(meta.merged$Room_type) <- str_replace_all(levels(meta.merged$Room_type), c('cubicles'='wards', 'Non-cohort'='Standard'))
levels(meta.merged$Cubicle_room) <- str_replace_all(levels(meta.merged$Cubicle_room), c('Cubicle'='Ward'))
write.table(meta.merged, '../output_tables/merged_assembly_metadata.tsv', quote=F, row.names = F, col.names = T, sep='\t')
meta.merged %<>% filter(timept %in% c(1,2))
colors <- pal_npg('nrc')(8)
Function to plot the tree with heatmap
## function to get cluster data:
get.clusters <- function(x, algo, diff_threshold, dist_type){
dist.file <- paste0("../tables/", x, "_", algo, "_mummer_heatmap_", dist_type, ".dat")
dist.dat <- read.table(dist.file)
meta.fil <- filter(meta.merged, Species==x)
idx <- rownames(dist.dat) %in% (filter(meta.fil, Genome_quality=='HIGH_QUAL'))$Genome_ID | ##High quality genomes
str_detect(rownames(dist.dat), 'G|F') # use ['G|F'] to include the new data
dist.dat <- dist.dat[idx, idx]
## clustering
cluster.full <- hclust(as.dist(dist.dat), method='single')
clusters <- cutree(cluster.full, h=diff_threshold)
## filter patient data
clusters <- clusters[str_detect(names(clusters), 's_')]
merged <- merge(data.frame(clusters), meta.fil, by.x=0, by.y=1) %>%
mutate(strain=paste0('s', clusters), input_distance_matrix=dist.file)
merged
}
## function to plot tree
plot.tree <- function(x, color, offset=0.01, title=NULL, scale_offset_x=0.005, scale_offset_y=0, algo, diff_threshold, dist_type){
merged <- get.clusters(x, algo, diff_threshold, dist_type)
tree.strains <- read.tree(paste0('../tables/trees/', x, ".parsnp.tree"))
tree.strains$tip.label %<>% str_replace_all(c(".trimmed.fasta"="", "nanopore.cons.cluster_"="s", ".ref"=""))
antibiotics <- select(merged, strain, Antibiotics) %>% group_by(strain, Antibiotics) %>%
count() %>% spread(Antibiotics,n,fill=0) %>%
select(-BHI) %>% data.frame(row.names = 'strain')
antibiotics[antibiotics > 0] <- 'D'
p <-
ggtree(midpoint.root(tree.strains), layout="fan", open.angle=45, lwd=1) +
geom_tippoint(size=3, shape=19, col=color) +
geom_tiplab2(size=5, offset=offset/3) +
geom_treescale(x=scale_offset_x, y=scale_offset_y, linesize=0.8, offset=0.1)
p <- gheatmap(p, offset=offset, antibiotics, color='black', colnames_offset_y = 0.3,##colnames_offset_x=10,
colnames_angle = 60, hjust =1, font.size=6.5) + scale_fill_manual(values=c('white', color)) +
theme(legend.position ='none', plot.title = element_text(size = 40, face = "bold.italic")) +
ggtitle(title)
return(p)
}
Save merged strain table (used for generating parsnp trees)
dist_type = "gsnp"
opera_ms_threshold = 0.0001
d1 <- get.clusters('Elizabethkingia_anophelis', 'opera_ms', opera_ms_threshold, dist_type)
d2 <- get.clusters('Staphylococcus_epidermidis', 'opera_ms', opera_ms_threshold, dist_type)
d3 <- get.clusters('Staphylococcus_aureus', 'opera_ms', opera_ms_threshold, dist_type)
d4 <- get.clusters('Acinetobacter_baumannii', 'opera_ms', opera_ms_threshold, dist_type)
#################
canu_threshold = 0.001
d5 <- get.clusters('Pseudomonas_aeruginosa', 'canu', canu_threshold, dist_type)
d6 <- get.clusters('Enterococcus_faecium', 'canu', canu_threshold, dist_type)
d7 <- get.clusters('Enterococcus_faecalis', 'canu', canu_threshold, dist_type)
d8 <- get.clusters('Klebsiella_pneumoniae', 'canu', canu_threshold, dist_type)
dat.all.species <- rbind(d1, d2, d3, d4, d5, d6, d7, d8)
write.table(dat.all.species,
'../output_tables/strain_cluster_summary.tsv', sep='\t', row.names = F, col.names = T, quote=F)
Generate tree plots
g1 <- plot.tree('Elizabethkingia_anophelis', colors[1], 1, 'Elizabethkingia anophelis', 1, 0, 'opera_ms', opera_ms_threshold, dist_type)
g2 <- plot.tree('Staphylococcus_epidermidis', colors[2], 0.08, 'Staphylococcus epidermidis', 0.35, 0, 'opera_ms', opera_ms_threshold, dist_type)
g3 <- plot.tree('Staphylococcus_aureus', colors[3], 0.05, 'Staphylococcus aureus', 0.2, 0, 'opera_ms', opera_ms_threshold, dist_type)
g4 <- plot.tree('Acinetobacter_baumannii', colors[4], 0.2, 'Acinetobacter baumannii', scale_offset_x = 0.3, 0, 'opera_ms', opera_ms_threshold, dist_type)
####
g5 <- plot.tree('Pseudomonas_aeruginosa', colors[5], 0.2, 'Pseudomonas aeruginosa', scale_offset_x = 0.3, 0, 'canu', canu_threshold, dist_type)
g6 <- plot.tree('Enterococcus_faecium', colors[6], 0.9, 'Enterococcus faecium', scale_offset_x = 0.8, 0, 'canu', canu_threshold, dist_type)
g7 <- plot.tree('Enterococcus_faecalis', colors[7], 0.05, 'Enterococcus faecalis', scale_offset_x = 0.2, 0, 'canu', canu_threshold, dist_type)
g8 <- plot.tree('Klebsiella_pneumoniae', colors[8], 0.2, 'Klebsiella pneumoniae', scale_offset_x = 0.5, 0, 'canu', canu_threshold, dist_type)
Main figure part (S. aureus)
g3
Supplementary figure part
s1 <- cowplot::plot_grid(g2, g4, g5, g6, g7, g8, nrow=1)
s1
Generate edge data
rbind(d1, d2, d3, d4, d5, d6, d7, d8) %>%
count(Species, clusters, Sample_type, Room_type, #bed_number,
timept, Sample_ID.y, Cubicle_room) %>%
mutate(clusters=sprintf("%02d", clusters)) %>%
distinct(Species, clusters, Sample_type, Cubicle_room, Room_type, timept) %>%
mutate(label1='Strain', label2='Site', label3='Room') %>%
unite(n1,c(label1, Species, clusters), sep='=', remove=F) %>%
unite(n2,c(label2, Sample_type), sep='=', remove=F) %>%
unite(n3,c(label3, Room_type, Cubicle_room), sep='=',remove=F) %>%
select(-label1, -label2, -label3) -> edge_data
## remove strains only occurred once at one place
edge_data <- filter(edge_data, n1%in%
(edge_data %>% group_by(n1) %>% count %>% filter(n>1))$n1)
Fisher’s exact test for enrichment (multi-resitance vs. persistance)
test.dat <-
rbind(d2, d3, d4, d5, d6, d7, d8) %>%
group_by(Species, clusters, Sample_type, Room_type, bed_number,
timept, Sample_ID.y, Cubicle_room) %>%
summarise(n=sum(Antibiotics!='BHI')) %>%
group_by(Species, clusters) %>% mutate(resistance= max(n)) %>% ungroup %>%
distinct(Species, clusters, resistance, timept) %>%
count(Species, clusters, resistance, name='found')
test <- function(x){
c1 <- nrow(filter(x, resistance>2, found>1))
c2 <- nrow(filter(x, resistance>2, found<=1))
c3 <- nrow(filter(x, resistance<=2, found>1))
c4 <- nrow(x)
fisher.test(matrix(c(c1,c2,c3,c4), 2,2))$p.value
}
sp <- unique(test.dat$Species)
c(sapply(sp, function(x) test(filter(test.dat, Species==x))), All=test(test.dat))
## Acinetobacter_baumannii Enterococcus_faecalis
## 5.000000e-02 3.744539e-03
## Enterococcus_faecium Klebsiella_pneumoniae
## 4.000000e-01 2.666667e-01
## Pseudomonas_aeruginosa Staphylococcus_aureus
## 1.000000e+00 1.578947e-02
## Staphylococcus_epidermidis All
## 2.257700e-03 2.903411e-08
Barplot for fraction of strains that persist
filter(test.dat, found>1) %>%
group_by(Species) %>%
summarise(MDR=sum(resistance>2)/n(), `non-MDR`=1-MDR) %>%
melt() %>%
mutate(Species=str_replace(Species, '[a-z]+_', '. ')) %>%
ggplot(aes(x=Species, y=value, fill=variable)) +
geom_bar(stat='identity') +
labs(y='Fraction of strains') +
theme(axis.text.x = element_text(angle=45, hjust=1, face='bold.italic'), legend.title = element_blank())
## Using Species as id variables
ggsave('../plots/sup12.fraction_of_MDR_strains_persist.svg', width=8, height=5)
Fisher’s exact test for enrichment (time vs. space)
test.dat <-
rbind(d2, d3, d4, d5, d6, d7, d8) %>%
distinct(Species, clusters, Sample_type, Room_type, bed_number,
timept, Sample_ID.y, Cubicle_room) %>%
distinct(Species, clusters, Room_type, bed_number, timept, Cubicle_room) %>%
count(Species, clusters, timept) %>%
group_by(Species, clusters) %>%
summarise(time=length(timept), space=max(n))
test <- function(x){
c1 <- nrow(filter(x, time>1, space>1))
c2 <- nrow(filter(x, time>1, space<=1))
c3 <- nrow(filter(x, time<=1, space>1))
c4 <- nrow(x)
fisher.test(matrix(c(c1,c2,c3,c4), 2,2))$p.value
}
sp <- unique(test.dat$Species)
c(sapply(sp, function(x) test(filter(test.dat, Species==x))), All=test(test.dat))
## Acinetobacter_baumannii Enterococcus_faecalis
## 1.298701e-02 4.901961e-03
## Enterococcus_faecium Klebsiella_pneumoniae
## 2.500000e-01 1.000000e+00
## Pseudomonas_aeruginosa Staphylococcus_aureus
## 1.000000e+00 1.107692e-01
## Staphylococcus_epidermidis All
## 1.266096e-04 1.355971e-09
Hive plot function
hiveplot <- function(species, col, silent=FALSE){
edge_plot <- filter(edge_data ,Species==species)
strain_col <- col
rbind(data.frame(x1=edge_plot$n1, x2=edge_plot$n2, color=edge_plot$timept, stringsAsFactors = F) ,
data.frame(x1=edge_plot$n1, x2=edge_plot$n3, color=edge_plot$timept, stringsAsFactors = F)
) %>%
group_by(x1, x2, color) %>% count() %>%
select(x1, x2, weight=n, color) %>%
arrange(desc(x1,x2)) -> e
hive <- edge2HPD(data.frame(e[,1:3]))
hive$nodes$axis <- as.integer(as.factor(str_split_fixed(hive$nodes$lab, '=', 2)[,1]))
hive$nodes$tag <- str_split_fixed(hive$nodes$lab, '=', 3)[,1]
hive$nodes[hive$nodes$axis==1, ] <- arrange(hive$nodes[hive$nodes$axis==1, ], lab) ## sort the names on the room axis
## species color
hive$nodes$color[ hive$nodes$tag == 'Strain'] <- strain_col
## site color
colors <- sapply(c(pal_simpsons('springfield')(16)), adjustcolor, alpha.f=0.9)
colormap <- data.frame(site=levels((edge_data$Sample_type)), col=pal_npg('nrc')(10)[c(5,7,2,3,10,4,1)], row.names = 1, stringsAsFactors = F)
#colormap <- data.frame(site=levels(edge_data$Sample_type), col=colors[1:9], row.names = 1, stringsAsFactors = F)
site.id <- hive$nodes$tag=='Site'
hive$nodes$color[site.id] <- colormap[str_split_fixed(hive$nodes$lab, '=', 2)[site.id, 2], ]
## room color
colormap <-data.frame(room=unique(edge_data$Room_type), col=colors[c(13,15,16)], row.names = 1, stringsAsFactors = F)
room.id <- hive$nodes$tag=='Room'
hive$nodes$color[room.id] <- colormap[str_split_fixed(hive$nodes$lab[room.id], '=', 3)[,2], ]
hive$nodes$size=2
hive$edges$weight <- hive$edges$weight*3-1
hive$edges$color <- ifelse(e$color<2, '#ff990055','#66ccff55')
#hive$edges$color <- ifelse(e$color<2,'#66ccff77', '#ff330077')
tmp <- data.frame(node.lab=hive$nodes$lab, node.text=hive$nodes$lab, angle=0, radius=0, offset=0, hjust=1, vjust=0.5)
mutate(tmp, node.text=str_replace(node.text, 'Strain=.*=', 'Strain=s')) %>%
separate(col=node.text, into=c('lab','node.text'), '=', extra='merge') %>%
mutate(node.text=str_replace_all(node.text, '_', ' ')) %>%
mutate(node.text=str_replace_all(node.text, '=', ': ')) %>%
mutate(offset=ifelse(lab=='Room' | lab=='Site' , -0.05, -0.03)) %>%
select(-lab) %>%
write.table('tmp_hive.txt', sep=',', quote=T, row.names = F, col.names = T)
if(silent){return(hive)}
plotHive(hive, ch=0.2, method='ranknorm', bkgnd='white', anNodes = 'tmp_hive.txt',
anNode.gpar=gpar(cex=1.5))
}
Main figure part (S. aureus)
hiveplot('Staphylococcus_aureus', colors[3])
aux <- function(){
vplayout <- function(x, y) viewport(layout.pos.row = x, layout.pos.col = y)
grid.newpage()
pushViewport(viewport(layout = grid.layout(1, 6)))
pushViewport(vplayout(1, 1)) # left plot
h <- hiveplot('Staphylococcus_epidermidis', colors[2], silent = TRUE)
plotHive(h, ch=0.2, method='ranknorm', bkgnd='white', anNodes = 'tmp_hive.txt', np=FALSE,anNode.gpar=gpar(cex=1.5))
popViewport(2)
pushViewport(vplayout(1, 2))
h <- hiveplot('Acinetobacter_baumannii', colors[4], silent=TRUE)
plotHive(h, ch=0.2, method='ranknorm', bkgnd='white', anNodes = 'tmp_hive.txt', np=FALSE,anNode.gpar=gpar(cex=1.5))
popViewport(2)
pushViewport(vplayout(1, 3))
h <- hiveplot('Pseudomonas_aeruginosa', colors[5], silent=TRUE) ## only one node -- cannot use 'ranknorm'
plotHive(h, ch=0.2, method='rank', bkgnd='white', anNodes = 'tmp_hive.txt', np=FALSE,anNode.gpar=gpar(cex=1.5))
popViewport(2)
pushViewport(vplayout(1, 4))
h <- hiveplot('Enterococcus_faecium', colors[6], silent=TRUE)
plotHive(h, ch=0.2, method='rank', bkgnd='white', anNodes = 'tmp_hive.txt', np=FALSE,anNode.gpar=gpar(cex=1.5))
popViewport(2)
pushViewport(vplayout(1, 5))
h <- hiveplot('Enterococcus_faecalis', colors[7], silent=TRUE)
plotHive(h, ch=0.2, method='ranknorm', bkgnd='white', anNodes = 'tmp_hive.txt', np=FALSE,anNode.gpar=gpar(cex=1.5))
popViewport(2)
pushViewport(vplayout(1, 6))
h <- hiveplot('Klebsiella_pneumoniae', colors[8], silent=TRUE) ## only one node -- cannot use 'ranknorm'
plotHive(h, ch=0.2, method='ranknorm', bkgnd='white', anNodes = 'tmp_hive.txt', np=FALSE,anNode.gpar=gpar(cex=1.5))
}
aux()
ggsave('../plots/fig3d.tree_main.svg', g3, width=10, height=10)
svg('../plots/fig3d.hive_main.svg', width = 10, height = 10)
hiveplot('Staphylococcus_aureus', colors[3])
dev.off()
## quartz_off_screen
## 2
ggsave('../plots/sup11.tree_sup.png', s1, width=54, height=9, limitsize = F)
png('../plots/sup11.hive_sup.png', width = 200, height = 30, units='cm', res=400)
aux()
dev.off()
## quartz_off_screen
## 2
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.15.2
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] grid stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] gdtools_0.1.7 forcats_0.3.0 phytools_0.6-99 maps_3.3.0
## [5] ape_5.3 magrittr_1.5 HiveR_0.3.42 ggtree_1.99.1
## [9] ggsci_2.9 reshape2_1.4.3 stringr_1.4.0 tibble_2.1.3
## [13] tidyr_1.0.0 dplyr_0.8.3 gridExtra_2.3 ggplot2_3.2.1
##
## loaded via a namespace (and not attached):
## [1] jsonlite_1.6 gtools_3.8.1
## [3] shiny_1.2.0 assertthat_0.2.1
## [5] expm_0.999-4 BiocManager_1.30.8
## [7] rvcheck_0.1.5 animation_2.6
## [9] yaml_2.2.0 numDeriv_2016.8-1
## [11] pillar_1.4.2 backports_1.1.5
## [13] lattice_0.20-38 glue_1.3.1
## [15] quadprog_1.5-5 phangorn_2.5.5
## [17] digest_0.6.22 manipulateWidget_0.10.0
## [19] RColorBrewer_1.1-2 promises_1.0.1
## [21] colorspace_1.4-1 cowplot_0.9.3
## [23] htmltools_0.3.6 httpuv_1.4.5
## [25] Matrix_1.2-15 plyr_1.8.4
## [27] pkgconfig_2.0.3 purrr_0.3.3
## [29] xtable_1.8-3 tidytree_0.2.8
## [31] scales_1.0.0 webshot_0.5.1
## [33] svglite_1.2.1 jpeg_0.1-8
## [35] later_0.7.5 combinat_0.0-8
## [37] ellipsis_0.3.0 withr_2.1.2
## [39] lazyeval_0.2.2 mnormt_1.5-5
## [41] crayon_1.3.4 mime_0.6
## [43] evaluate_0.12 nlme_3.1-137
## [45] MASS_7.3-51.1 tools_3.5.1
## [47] lifecycle_0.1.0 munsell_0.5.0
## [49] plotrix_3.7-6 compiler_3.5.1
## [51] clusterGeneration_1.3.4 rlang_0.4.0
## [53] tkrgl_0.8 htmlwidgets_1.3
## [55] crosstalk_1.0.0 igraph_1.2.2
## [57] miniUI_0.1.1.1 labeling_0.3
## [59] tcltk_3.5.1 rmarkdown_1.10
## [61] gtable_0.3.0 R6_2.4.0
## [63] knitr_1.20 zeallot_0.1.0
## [65] fastmatch_1.1-0 rprojroot_1.3-2
## [67] treeio_1.9.3 stringi_1.4.3
## [69] parallel_3.5.1 Rcpp_1.0.2
## [71] vctrs_0.2.0 png_0.1-7
## [73] rgl_0.99.16 coda_0.19-2
## [75] scatterplot3d_0.3-41 tidyselect_0.2.5